# setwd("~/Dropbox/T&G project survey/Ukraine/replication_JOP")
# libraries
library(tidyverse)
library(cobalt)
library(patchwork)
library(paletteer)
library(MatchIt)
library(estimatr)
library(lmtest)
library(stargazer)
library(xtable)
source("func.R")

# ggplot defaults: geoms theme
update_geom_defaults("text", list(family = "Archivo Narrow"))
update_geom_defaults("label", list(family = "Archivo Narrow"))
theme_set(theme_nice())

## Load data, limit date
df = read_rds("data/clean_data.rds") %>%
  filter(post == 0 | date <= as.Date("2022-03-07"))

## Dictionary
dict = tribble(~dirty, ~clean,
  "sexo_2", "Sex (female)",
  "edad", "Age",
  "edu", "Education",
  "clase_social_r", "Social class\n(scale)",
  "hab_r_2", "Location\n(over/below 10,000 residents)",
  "educacion_r", "Education\n(scale)",
  "Q12_2", "Voted in last elections",
  "Q14", "Income\n(scale)",
  "ideo", "Ideology\n(scale)",
  "imp_democracy", "Democracy important",
  "trust_EU", "Trust in EU",
  "trust_natgov", "Trust in national government",
  "trust_army", "Trust in army",
  "trust_police", "Trust in police",
  "Q35", "Increase taxes",
  "Q36", "Increase spending",
  "Q38", "Reduce inequality",
  "AP_p", "Polarization (party)",
  "AP_v", "Polarization (voter)",
  "Q60", "Spanish identification",
  "Q61", "Regional identification",
  "distance", "Propensity score\n(distance)")

# Matching variables
mvars = c("edad", "Q14", "ideo", "sexo", "clase_social_r", "educacion_r")

## matching ---------------------------------------------------------------

# full matching; estimand is ATC since we want to model what would
# have happened to controls had they been exposed to invasion
f = as.formula(paste0("post ~ ", paste(mvars, collapse = "+")))
match = matchit(f, data = df, method = "full", estimand = "ATC")

# Balance tables
bal_tab = rbind(
  matrix(rep(NA, 3), ncol=3),
  summary(match)$sum.all[, 1:3],
  matrix(rep(NA, 3), ncol=3),
  summary(match)$sum.matched[, 1:3]) %>%
  as.data.frame %>%
  mutate(var = gsub("\\.1", "", rownames(.))) %>%
  mutate(var = recode(var,
  "distance" = "Distance (propensity score)",
  "ideo" = "Ideology",
  "sexo" = "Sex",
  "edad" = "Age",
  "edu" = "Education",
  "clase_social_r" = "Social class",
  "educacion_r" = "Education",
  "Q14" = "Income")) %>%
  select(var, 1:3)

c = "Balance table"
l = "tab:balance"
bal_tab_tex = str_split(print(xtable(bal_tab, caption = c, label = l),
  include.rownames = FALSE, caption.placement = "top"), "\n")[[1]]
bal_tab_tex = gsub("X &  &  &  ", "\\\\textbf{Original dataset}&&&", bal_tab_tex)
bal_tab_tex[grepl("Original", bal_tab_tex)][2] = gsub("Original", "Matched",
  bal_tab_tex[grepl("Original", bal_tab_tex)][2])
bal_tab_tex = gsub("var ", "", bal_tab_tex)

filecon = file("tables/tab_balance.tex")
writeLines(bal_tab_tex, filecon)
close(filecon)


# Balance plot
pdat = love.plot(match, stats = c("m"),
    thresholds = c(m = .1))$data %>%
  tibble() %>%
  left_join(dict, by = c("var" = "dirty")) %>%
  mutate(clean = factor(clean),
    clean = fct_relevel(clean, "Propensity score\n(distance)", after = Inf))

pdat_plot = pdat %>%
  ggplot(aes(y = clean, x = stat, color = Sample)) +
  geom_line(size = 1, color = "black", alpha = .8) +
  geom_point(size = 2.2) +
  geom_vline(xintercept = -.1, lty = 2) +
  geom_vline(xintercept = .1, lty = 2) +
  coord_cartesian(xlim = c(-1, 1)) +
  labs(x = "Standardized mean differences\n(post-invasion - pre-invasion)",
       y = NULL,
       color = NULL) +
  theme(legend.position = "top") +
  scale_color_paletteer_d(`"wesanderson::Darjeeling1"`, direction = -1)
# Save
ggsave("figures/matched_balance.pdf", height = 5, width = 5, device = cairo_pdf)

## MAIN MODELS --------------------------------------------------------------

# Get data and join with outcomes
matched_data = match.data(match)

## Main Results on national and regional ID

m_natID = lm_robust(Q60 ~ post, data = matched_data, weights = weights,
  clusters = subclass, se_type = "stata")
m_regID = lm_robust(Q61 ~ post, data = matched_data, weights = weights,
  clusters = subclass, se_type = "stata")

m_coefs = rbind(
    tidy(m_natID, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main = ggplot(m_coefs, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12))
ggsave("figures/main_models.pdf", width = 5, height = 3, device = cairo_pdf)

# Tables
# Models
m_natIDt = lm(Q60 ~ post, data = matched_data, weights = weights)
m_regIDt = lm(Q61 ~ post, data = matched_data, weights = weights)
m_natIDt_covs = lm(Q60 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa), data = df)
m_regIDt_covs = lm(Q61 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa), data = df)

# SE list for Stargazer
mse = list(
  starprep(m_natIDt, clusters = matched_data$subclass, se_type = "stata")[[1]],
  summary(m_natIDt_covs)$coefficients[, "Std. Error"],
  starprep(m_regIDt, clusters = matched_data$subclass, se_type = "stata")[[1]],
  summary(m_regIDt_covs)$coefficients[, "Std. Error"])

# Stargazer
mlist = list(m_natIDt, m_natIDt_covs, m_regIDt, m_regIDt_covs)
filecon = file("tables/tab_lm_main.tex")
writeLines(
  stargazer(mlist,
    se = mse,
    keep = c("Constant", "post"),
    label = "tab:lm_main",
    title = "Effect of invasion on national and regional identification",
    order = c("Constant", "post"),
    dep.var.labels = c("National ID", "Regional ID"),
    covariate.labels = c("(Intercept)", "Post-invasion period"),
    omit.stat = c("f", "ser", "n"),
    column.sep.width = "0pt",
    dep.var.caption = "",
    font.size = "small",
    digits = 3,
    digits.extra = 0,
    star.char = c("+", "*", "**", "***"),
    star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
    notes.align = "c",
    align = TRUE,
    no.space = TRUE,
    notes = "\\parbox[t]{0.65\\textwidth}{\\footnotesize \\textit{Note:} $+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001$. Observations in treated group weighted by propensity score, using optimal full matching on age, income, ideology, gender, education, and social class. Models with individual-level covariates include these variables as control, as well as region (CCAA) fixed effects.}",
    notes.label = "",
    add.lines = list(
      c("Indiv-level covariates", rep(c("No", "Yes"), 2)),
      c("Region (CCAA) FE", rep(c("No", "Yes"), 2)),
      c("Matched data", rep(c("Yes", "No"), 2)),
      c("Observations", sapply(mlist, function(x) nobs(x))),
      c("Control", sapply(mlist, function(x) extract(x, "c"))),
      c("Treated", sapply(mlist, function(x) extract(x, "t")))),
    notes.append = FALSE),
  filecon)
close(filecon)

## Results by ideology

# New matched datasets
match_l = matchit(f, data = subset(df, leaning == "Left"), method = "full", estimand = "ATC")
match_r = matchit(f, data = subset(df, leaning == "Right"), method = "full", estimand = "ATC")
matched_data_l = match.data(match_l)
matched_data_r = match.data(match_r)

# Subsamples
m_natID_l = lm_robust(Q60 ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_l = lm_robust(Q61 ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_r = lm_robust(Q60 ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_r = lm_robust(Q61 ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_ideo = rbind(
    tidy(m_natID_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_r, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(ideo = rep(c("Leaning left (0-5)", "Leaning right (6-10)"), each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main_ideo = ggplot(m_coefs_ideo, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~ideo)
ggsave("figures/main_models_ideology.pdf", width = 7, height = 3, device = cairo_pdf)

# Tables
m_natID_lt = lm(Q60 ~ post, data = matched_data_l, weights = weights)
m_regID_lt = lm(Q61 ~ post, data = matched_data_l, weights = weights)
m_natID_rt = lm(Q60 ~ post, data = matched_data_r, weights = weights)
m_regID_rt = lm(Q61 ~ post, data = matched_data_r, weights = weights)
m_natID_lt_covs = lm(Q60 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa), data = df)
m_regID_lt_covs = lm(Q61 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa), data = df)
m_natID_rt_covs = lm(Q60 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa), data = df)
m_regID_rt_covs = lm(Q61 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa), data = df)

# SE list for Stargazer
mse = list(
  starprep(m_natID_lt, clusters = matched_data_l$subclass, se_type = "stata")[[1]],
  summary(m_natID_lt_covs)$coefficients[, "Std. Error"],
  starprep(m_natID_rt, clusters = matched_data_r$subclass, se_type = "stata")[[1]],
  summary(m_natID_rt_covs)$coefficients[, "Std. Error"],
  starprep(m_regID_lt, clusters = matched_data_l$subclass, se_type = "stata")[[1]],
  summary(m_regID_lt_covs)$coefficients[, "Std. Error"],
  starprep(m_regID_rt, clusters = matched_data_r$subclass, se_type = "stata")[[1]],
  summary(m_regID_rt_covs)$coefficients[, "Std. Error"]
  )

# Stargazer
mlist = list(m_natID_lt, m_natID_lt_covs, m_natID_rt, m_natID_rt_covs,
  m_regID_lt, m_regID_lt_covs, m_regID_rt, m_regID_rt_covs)
filecon = file("tables/tab_lm_main_ideo.tex")
writeLines(
  stargazer(mlist,
    se = mse,
    keep = c("Constant", "post"),
    label = "tab:lm_main_ideo",
    title = "Effect of invasion on national and regional identification depending on ideology",
    order = c("Constant", "post"),
    covariate.labels = c("(Intercept)", "Post-invasion period"),
    omit.stat = c("f", "ser", "n"),
    column.sep.width = "0pt",
    dep.var.caption = "",
    font.size = "small",
    digits = 3,
    digits.extra = 0,
    star.char = c("+", "*", "**", "***"),
    star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
    notes.align = "c",
    align = TRUE,
    no.space = TRUE,
    dep.var.labels = c("National ID", "Regional ID"),
    column.labels = c("Left", "Right", "Left", "Right"),
    column.separate = rep(2, 4),
    notes = "\\parbox[t]{1\\textwidth}{\\footnotesize \\textit{Note:} $+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001$. Observations in treated group weighted by propensity score, using optimal full matching on age, income, ideology, gender, education, and social class. Models with individual-level covariates include these variables as control, as well as region (CCAA) fixed effects. Left refers to respondents who place themselves between 0 and 5 in the ideological scale, while Right refers to respondents who place themselves between 6 and 10.}",
    notes.label = "",
    add.lines = list(
      c("Indiv-level covariates", rep(c("No", "Yes"), 4)),
      c("Region (CCAA) FE", rep(c("No", "Yes"), 4)),
      c("Matched data", rep(c("Yes", "No"), 4)),
      c("Observations", sapply(mlist, function(x) nobs(x))),
      c("Control", sapply(mlist, function(x) extract(x, "c"))),
      c("Treated", sapply(mlist, function(x) extract(x, "t")))),
    notes.append = FALSE),
  filecon)
close(filecon)

## -----------------------------------------------------------------------------
## Results by media preference (any newspaper vs sport/entertainment)

# New matched datasets
match_prefernews = matchit(f, data = subset(df, prefer_other_media == 0),
  method = "full", estimand = "ATC")
match_preferother = matchit(f, data = subset(df, prefer_other_media == 1),
  method = "full", estimand = "ATC")
matched_data_prefernews = match.data(match_prefernews)
matched_data_preferother = match.data(match_preferother)

# Subsamples
m_natID_prefernews = lm_robust(Q60 ~ post, data = matched_data_prefernews,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_prefernews = lm_robust(Q61 ~ post, data = matched_data_prefernews,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_preferother = lm_robust(Q60 ~ post, data = matched_data_preferother,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_preferother = lm_robust(Q61 ~ post, data = matched_data_preferother,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_mediapref = rbind(
    tidy(m_natID_prefernews, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_prefernews, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_preferother, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_preferother, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(mediapref = rep(c("Prefer newspapers (n = 1733)",
    "Prefer sports/entertainment media (n = 311)"), each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main_med = ggplot(m_coefs_mediapref, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~mediapref)
ggsave("figures/main_models_mediapref.pdf", main_med, width = 7, height = 3, device = cairo_pdf)

## -----------------------------------------------------------------------------
## Results by age (cutoff: 45 years old)

# New matched datasets
match_age1 = matchit(f, data = subset(df, edad %in% 18:40),
  method = "full", estimand = "ATC")
match_age2 = matchit(f, data = subset(df, edad %in% 41:55),
  method = "full", estimand = "ATC")
match_age3 = matchit(f, data = subset(df, edad >= 56),
  method = "full", estimand = "ATC")
matched_data_age1 = match.data(match_age1)
matched_data_age2 = match.data(match_age3)
matched_data_age3 = match.data(match_age2)

# Subsamples
m_natID_age1 = lm_robust(Q60 ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_age1 = lm_robust(Q61 ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_age2 = lm_robust(Q60 ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_age2 = lm_robust(Q61 ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_age3 = lm_robust(Q60 ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_age3 = lm_robust(Q61 ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_age = rbind(
    tidy(m_natID_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_age3, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(age = rep(c("Age 18-40", "Age 41-55", "Age 56+"), each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main_age = ggplot(m_coefs_age, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~age)
ggsave("figures/main_models_age.pdf", main_age, width = 7.5, height = 3, device = cairo_pdf)

## -----------------------------------------------------------------------------
## Results by gender

# New matched datasets
match_wom = matchit(f, data = subset(df, sexo == 2),
  method = "full", estimand = "ATC")
match_men = matchit(f, data = subset(df, sexo == 1),
  method = "full", estimand = "ATC")
matched_data_wom = match.data(match_wom)
matched_data_men = match.data(match_men)

# Subsamples
m_natID_wom = lm_robust(Q60 ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_wom = lm_robust(Q61 ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_men = lm_robust(Q60 ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_men = lm_robust(Q61 ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_gender = rbind(
    tidy(m_natID_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_men, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(gender = rep(c("Women", "Men"), each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main_gender = ggplot(m_coefs_gender, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~gender)
ggsave("figures/main_models_gender.pdf", main_gender, width = 7, height = 3, device = cairo_pdf)

#####################################################################################
## NOTE (Sept 28): Falta hacer la tabla para estos modelos, in case we want to do it:
## Heterogenous effects by a) media pref, b) age, c) gender
#####################################################################################

## ALL OUTCOMES --------------------------------------------------------------

m_other1 = lm_robust(imp_democracy ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2 = lm_robust(trust_EU ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3 = lm_robust(trust_natgov ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4 = lm_robust(trust_army ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5 = lm_robust(trust_police ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6 = lm_robust(Q35 ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7 = lm_robust(Q36 ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8 = lm_robust(Q38 ~ post, data = matched_data,
  weights = weights, clusters = subclass, se_type = "stata")
# m_other9 = lm_robust(AP_p ~ post, data = matched_data,
#   weights = weights, clusters = subclass, se_type = "stata")

m_coefs_all = rbind(
    tidy(m_other1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8, conf.int = TRUE) %>% filter(term == "post")) %>%
  left_join(dict, by = c("outcome" = "dirty")) %>%
  mutate(clean = recode(clean, "Polarization (party)" = "Affective polarization")) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

all_outs = ggplot(m_coefs_all, aes(x = estimate, y = clean, color = p.value < .05)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12), legend.position = "none") +
  scale_color_manual(values = c("gray", "black"))
ggsave("figures/all_outcomes.pdf", width = 7, height = 3, device = cairo_pdf)

## ALL OUTCOMES by IDEOLOGY -------------------------------------------------------

m_other1_l = lm_robust(imp_democracy ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other1_r = lm_robust(imp_democracy ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_l = lm_robust(trust_EU ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_r = lm_robust(trust_EU ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_l = lm_robust(trust_natgov ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_r = lm_robust(trust_natgov ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_l = lm_robust(trust_army ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_r = lm_robust(trust_army ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_l = lm_robust(trust_police ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_r = lm_robust(trust_police ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_l = lm_robust(Q35 ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_r = lm_robust(Q35 ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_l = lm_robust(Q36 ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_r = lm_robust(Q36 ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_l = lm_robust(Q38 ~ post, data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_r = lm_robust(Q38 ~ post, data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
# m_other9_l = lm_robust(AP_p ~ post, data = matched_data_l,
#   weights = weights, clusters = subclass, se_type = "stata")
# m_other9_r = lm_robust(AP_p ~ post, data = matched_data_r,
#   weights = weights, clusters = subclass, se_type = "stata")

m_coefs_all_ideo = rbind(
    tidy(m_other1_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other1_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_r, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(ideo = rep(c("Leaning left (0-5)", "Leaning right (6-10)"), each = 8)) %>%
  left_join(dict, by = c("outcome" = "dirty")) %>%
  mutate(clean = recode(clean, "Polarization (party)" = "Affective polarization")) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

all_outs_ideo = ggplot(m_coefs_all_ideo,
    aes(x = estimate, y = clean, color = p.value < .05)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12), legend.position = "none") +
  scale_color_manual(values = c("gray", "black")) +
  facet_wrap(~ideo)
ggsave("figures/all_outcomes_ideo.pdf", width = 10, height = 3.5, device = cairo_pdf)

## ALL OUTCOMES by AGE -------------------------------------------------------

m_other1_age1 = lm_robust(imp_democracy ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other1_age2 = lm_robust(imp_democracy ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other1_age3 = lm_robust(imp_democracy ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_age1 = lm_robust(trust_EU ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_age2 = lm_robust(trust_EU ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_age3 = lm_robust(trust_EU ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_age1 = lm_robust(trust_natgov ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_age2 = lm_robust(trust_natgov ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_age3 = lm_robust(trust_natgov ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_age1 = lm_robust(trust_army ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_age2 = lm_robust(trust_army ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_age3 = lm_robust(trust_army ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_age1 = lm_robust(trust_police ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_age2 = lm_robust(trust_police ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_age3 = lm_robust(trust_police ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_age1 = lm_robust(Q35 ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_age2 = lm_robust(Q35 ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_age3 = lm_robust(Q35 ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_age1 = lm_robust(Q36 ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_age2 = lm_robust(Q36 ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_age3 = lm_robust(Q36 ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_age1 = lm_robust(Q38 ~ post, data = matched_data_age1,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_age2 = lm_robust(Q38 ~ post, data = matched_data_age2,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_age3 = lm_robust(Q38 ~ post, data = matched_data_age3,
  weights = weights, clusters = subclass, se_type = "stata")
# m_other9_age1 = lm_robust(AP_p ~ post, data = matched_data_age1,
#   weights = weights, clusters = subclass, se_type = "stata")
# m_other9_age2 = lm_robust(AP_p ~ post, data = matched_data_age2,
#   weights = weights, clusters = subclass, se_type = "stata")
# m_other9_age3 = lm_robust(AP_p ~ post, data = matched_data_age3,
#   weights = weights, clusters = subclass, se_type = "stata")

m_coefs_all_age = rbind(
    tidy(m_other1_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other1_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other1_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_age3, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_age1, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_age2, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_age3, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(age = rep(c("Age 18-40", "Age 41-55", "Age 56+"), 8)) %>%
  left_join(dict, by = c("outcome" = "dirty")) %>%
  mutate(clean = recode(clean, "Polarization (party)" = "Affective polarization")) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

all_outs_age = ggplot(m_coefs_all_age,
    aes(x = estimate, y = clean, color = p.value < .05)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12), legend.position = "none") +
  scale_color_manual(values = c("gray", "black")) +
  facet_wrap(~age)
ggsave("figures/all_outcomes_age.pdf", width = 10.5, height = 3.5, device = cairo_pdf)

## ALL OUTCOMES by GENDER -------------------------------------------------------

m_other1_wom = lm_robust(imp_democracy ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other1_men = lm_robust(imp_democracy ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_wom = lm_robust(trust_EU ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other2_men = lm_robust(trust_EU ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_wom = lm_robust(trust_natgov ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other3_men = lm_robust(trust_natgov ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_wom = lm_robust(trust_army ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other4_men = lm_robust(trust_army ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_wom = lm_robust(trust_police ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other5_men = lm_robust(trust_police ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_wom = lm_robust(Q35 ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other6_men = lm_robust(Q35 ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_wom = lm_robust(Q36 ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other7_men = lm_robust(Q36 ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_wom = lm_robust(Q38 ~ post, data = matched_data_wom,
  weights = weights, clusters = subclass, se_type = "stata")
m_other8_men = lm_robust(Q38 ~ post, data = matched_data_men,
  weights = weights, clusters = subclass, se_type = "stata")
# m_other9_wom = lm_robust(AP_p ~ post, data = matched_data_wom,
#   weights = weights, clusters = subclass, se_type = "stata")
# m_other9_men = lm_robust(AP_p ~ post, data = matched_data_men,
#   weights = weights, clusters = subclass, se_type = "stata")

m_coefs_all_gender = rbind(
    tidy(m_other1_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_wom, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other1_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other2_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other3_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other4_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other5_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other6_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other7_men, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_other8_men, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(ideo = rep(c("Women", "Men"), each = 8)) %>%
  left_join(dict, by = c("outcome" = "dirty")) %>%
  mutate(clean = recode(clean, "Polarization (party)" = "Affective polarization")) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

all_outs_gender = ggplot(m_coefs_all_gender,
    aes(x = estimate, y = clean, color = p.value < .05)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12), legend.position = "none") +
  scale_color_manual(values = c("gray", "black")) +
  facet_wrap(~ideo)
ggsave("figures/all_outcomes_gender.pdf", width = 10, height = 3.5, device = cairo_pdf)


## EXCLUDING/ONLY BASQUE COUNTRY & CATALONIA --------------------------------------

# Provinces codes
ec = c(1, 20, 48, 8, 17, 25, 43)

## Matching in each sample
f = as.formula(paste0("post ~ ", paste(mvars, collapse = "+")))
match_EukCat = matchit(f, data = subset(df, provincia %in% ec),
  method = "full", estimand = "ATC")
match_noEukCat = matchit(f, data = subset(df, !provincia %in% ec),
  method = "full", estimand = "ATC")
# Get data and join with outcomes
m_data_EukCat = match.data(match_EukCat)
m_data_noEukCat = match.data(match_noEukCat)


m_natID_EukCat = lm_robust(Q60 ~ post, data = m_data_EukCat,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_EukCat = lm_robust(Q61 ~ post, data = m_data_EukCat,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_noEukCat = lm_robust(Q60 ~ post, data = m_data_noEukCat,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_noEukCat = lm_robust(Q61 ~ post, data = m_data_noEukCat,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_EukCat = rbind(
    tidy(m_natID_EukCat, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_EukCat, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_noEukCat, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_noEukCat, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(region = rep(c("Catalonia and Basque Country", "Rest of Spain"), each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main_EC = ggplot(m_coefs_EukCat, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs. Matching within each sample.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~region, scales = "free_x")
ggsave("figures/main_models_EukCat.pdf", width = 7, height = 3, device = cairo_pdf)

# Tables
m_natID_EukCat_t = lm(Q60 ~ post, data = m_data_EukCat, weights = weights)
m_regID_EukCat_t = lm(Q61 ~ post, data = m_data_EukCat, weights = weights)
m_natID_noEukCat_t = lm(Q60 ~ post, data = m_data_noEukCat, weights = weights)
m_regID_noEukCat_t = lm(Q61 ~ post, data = m_data_noEukCat, weights = weights)
m_natID_EukCat_t_covs = lm(Q60 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa),
  data = subset(df, provincia %in% ec))
m_regID_EukCat_t_covs = lm(Q61 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa),
  data = subset(df, provincia %in% ec))
m_natID_noEukCat_t_covs = lm(Q60 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa),
  data = subset(df, !provincia %in% ec))
m_regID_noEukCat_t_covs = lm(Q61 ~ post +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r + factor(ccaa),
  data = subset(df, !provincia %in% ec))

# SE list for Stargazer
mse = list(
  starprep(m_natID_EukCat_t, clusters = m_data_EukCat$subclass, se_type = "stata")[[1]],
  summary(m_natID_EukCat_t_covs)$coefficients[, "Std. Error"],
  starprep(m_regID_EukCat_t, clusters = m_data_EukCat$subclass, se_type = "stata")[[1]],
  summary(m_regID_EukCat_t_covs)$coefficients[, "Std. Error"],
  starprep(m_natID_noEukCat_t, clusters = m_data_noEukCat$subclass, se_type = "stata")[[1]],
  summary(m_natID_noEukCat_t_covs)$coefficients[, "Std. Error"],
  starprep(m_regID_noEukCat_t, clusters = m_data_noEukCat$subclass, se_type = "stata")[[1]],
  summary(m_regID_noEukCat_t_covs)$coefficients[, "Std. Error"]
  )

# Stargazer
mlist = list(m_natID_EukCat_t, m_natID_EukCat_t_covs,
  m_regID_EukCat_t, m_regID_EukCat_t_covs,
  m_natID_noEukCat_t, m_natID_noEukCat_t_covs,
  m_regID_noEukCat_t, m_regID_noEukCat_t_covs)

stg = stargazer(mlist,
  se = mse,
  keep = c("Constant", "post"),
  label = "tab:lm_EukCat",
  title = "Effect of invasion on national and regional identification in Catalonia and Basque Country and rest of Spain",
  order = c("Constant", "post"),
  covariate.labels = c("(Intercept)", "Post-invasion period"),
  omit.stat = c("f", "ser", "n"),
  column.sep.width = "0pt",
  dep.var.caption = "",
  font.size = "small",
  digits = 3,
  digits.extra = 0,
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  notes.align = "c",
  align = TRUE,
  no.space = TRUE,
  dep.var.labels = c("National ID", "Regional ID", "National ID", "Regional ID"),
  column.labels = c("Only Catalonia and Basque Country", "Rest of Spain"),
  column.separate = rep(4, 2),
  notes = "\\parbox[t]{1\\textwidth}{\\footnotesize \\textit{Note:} $+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001$. Observations in treated group weighted by propensity score, using optimal full matching on age, income, ideology, gender, education, and social class. Models with individual-level covariates include these variables as control.}",
  notes.label = "",
  add.lines = list(
    c("Indiv-level covariates", rep(c("No", "Yes"), 4)),
    c("Region (CCAA) FE", rep(c("No", "Yes"), 4)),
    c("Matched data", rep(c("Yes", "No"), 4)),
    c("Observations", sapply(mlist, function(x) nobs(x))),
    c("Control", sapply(mlist, function(x) extract(x, "c"))),
    c("Treated", sapply(mlist, function(x) extract(x, "t")))),
  notes.append = FALSE)
cn = which(grepl("Only Catalonia and Basque Country}", stg))
stg2 = c(stg[1:(cn-2)], stg[cn], paste0("\\cline{2-", length(mlist)+1, "}"),
  stg[cn-1], stg[(cn+1):length(stg)])

filecon = file("tables/tab_lm_EukCat.tex")
writeLines(stg2, filecon)
close(filecon)


## BASQUE COUNTRY & CATALONIA: Nacionalist vs non-nat vote --------------------------------------

# Provinces codes
ec = c(1, 20, 48, 8, 17, 25, 43)

## Matching in each sample
f = as.formula(paste0("post ~ ", paste(mvars, collapse = "+")))
match_nac = matchit(f, data = subset(df, provincia %in% ec & vote_EukCat == 1),
  method = "full", estimand = "ATC")
match_not_nac = matchit(f, data = subset(df, provincia %in% ec & vote_EukCat == 0),
  method = "full", estimand = "ATC")
# Get data and join with outcomes
m_data_nac = match.data(match_nac)
m_data_not_nac = match.data(match_not_nac)

m_natID_nac = lm_robust(Q60 ~ post, data = m_data_nac,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_nac = lm_robust(Q61 ~ post, data = m_data_nac,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_not_nac = lm_robust(Q60 ~ post, data = m_data_not_nac,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_not_nac = lm_robust(Q61 ~ post, data = m_data_not_nac,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_EC_nac = rbind(
    tidy(m_natID_nac, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_nac, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_natID_not_nac, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_regID_not_nac, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(vote = rep(c("Euk/Cat Nationalist voters", "Rest of voters"), each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

main_EC_nac = ggplot(m_coefs_EC_nac, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs. Matching within each sample.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~vote, scales = "free_x")
ggsave("figures/main_models_nationalist_EukCat.pdf", width = 7, height = 3, device = cairo_pdf)


## EXPLORE CHANGES IN POLARIZATION (PARTY SCORES) --------------------------------------

m_score_left_l = lm_robust(mean_score_left ~ post,
  data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_score_right_l = lm_robust(mean_score_right ~ post,
  data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_score_peri_l = lm_robust(mean_score_periph ~ post,
  data = matched_data_l,
  weights = weights, clusters = subclass, se_type = "stata")
m_score_left_r = lm_robust(mean_score_left ~ post,
  data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_score_right_r = lm_robust(mean_score_right ~ post,
  data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")
m_score_peri_r = lm_robust(mean_score_periph ~ post,
  data = matched_data_r,
  weights = weights, clusters = subclass, se_type = "stata")

m_coefs_scores = rbind(
    tidy(m_score_left_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_score_right_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_score_peri_l, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_score_left_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_score_right_r, conf.int = TRUE) %>% filter(term == "post"),
    tidy(m_score_peri_r, conf.int = TRUE) %>% filter(term == "post")) %>%
  mutate(outcome2 = recode(outcome,
    "mean_score_left" = "Mean feeling towards\nleftist parties",
    "mean_score_right" = "Mean feeling towards\nrightist parties",
    "mean_score_periph" = "Mean feeling towards\nCatalan/Basque parties")) %>%
  mutate(ideo = rep(c("Leaning left (0-5)", "Leaning right (6-10)"), each = 3)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

scores_plot = ggplot(m_coefs_scores, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(x = "\nCoefficient estimate of invasion", y = "",
    subtitle = "Bars indicate 90% and 95% CIs.") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~ideo)
ggsave("figures/models_party_scores.pdf", width = 7, height = 3, device = cairo_pdf)
